ABCDEFGHIJKLMNOPQRSTUVWXYZAAABACADAEAFAGAHAIAJAKALAMANAOAPAQARASATAUAVAWAXAYAZBABBBCBDBEBFBGBHBIBJBKBLBMBNBOBPBQBRBSBTBUBVBWBXBYBZCACBCCCDCECFCGCHCICJCKCLCMCNCOCPCQCRCSCTCUCVCW
1
32FeatureMAP
32 x 32
2
ping pong bram! bram = simple bramwrite a = ! write bDelay Line
3
idx012...n
4
case1case2ping
pong
bram
➡️datadatadata...data➡️pe➡️
5
write a = trueb = false)write a = trueb = false)➡️➡️➡️...➡️
6
7
bram0bram1bram0bram1
8
addrdataaddr512addrdataaddr512
9
0x005120x005120x005120x00512PE (Mac Unit)
10
write➡️0x015120x015120x015120x01512
32x32
32
11
0x025120x025120x02512write➡️0x02512int8 featureMAPint8
12
0x035120x035120x035120x03512xX⬇️
13
0x04512read➡️0x045120x04512read➡️0x04512int8 Weightint8➡️int16➡️int8
14
read➡️0x055120x05512write➡️0x055120x05512⬇️↘️result
int32
15
0x065120x065120x065120x06512= INT16int8int16+d
e
l
a
y

L
i
n
e
16
0x075120x075120x075120x07512⬇️
17
0x085120x085120x085120x08512+ result int32result
int32
18
0x095120x095120x095120x09512= result int32
19
20
21
Delay Line
22
rms norm inv sqrt
23
16bit8bit32
Weight
32x1
➡️PE
0,0
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
0,31
24
rmsnorm_slope.memrmsnorm_inter.mem
25
ADDRValueAddrValue➡️PE
1,0
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
26
0x00000000477A0x100000FF
27
0x0001000047580x100104FA➡️PE
2,0
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
28
0x00020000474D0x100200FF
29
0x0003000047420x100304FA➡️PE
3,0
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
30
0x0004000047370x100400FF
31
0x00050000472C0x100504FAPE
4,0
PEPEPEPEPEPEPEPEPE0PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
32
0x0006000047210x100610FF
33
0x0007000047160x100704FA➡️PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE0PEPEPEPEPEPEPEPEPEPEPE
34
0x00080000470B0x100800FF
35
0x0009000047370x100904FA➡️PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
36
0x00100000472C0x101000FF
37
0x0011000047210x101104FA➡️PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
38
0x0012000047160x101200FF
39
➡️PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
40
41
PE
0,0
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
0,31
42
43
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
44
45
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
46
47
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
48
49
0PEPEPEPEPEPEPEPEPE0PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
50
51
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE0PEPEPEPEPEPEPEPEPEPEPE
52
53
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
54
55
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
56
57
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
58
59
PE
0,0
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
0,31
60
61
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
62
63
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
64
65
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
66
67
0PEPEPEPEPEPEPEPEPE0PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
68
69
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE0PEPEPEPEPEPEPEPEPEPEPE
70
71
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
72
73
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
74
75
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
76
77
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
78
79
0PEPEPEPEPEPEPEPEPE0PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
80
81
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE0PEPEPEPEPEPEPEPEPEPEPE
82
83
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
84
85
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
86
87
PEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPEPE
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
4bit2bit
12bit
9bit 3bit2bit
116
OPCODE
Control
Flags
REG addr set
Data Config
SIMD
options
Reserved
117
Override
Destination
Lane IDq
Find Emax
118
CMD
Source1
Data Src
AlignMantissa
119
Chhaining
Source2
Datatype 1
Accumulate
120
Chaining
Source 2 -
Datatype 2
121
122
123
124
125
126
127
128
129
130
131
132
133
gelu LUT
134
result ping pong bram0O_accO_accO_accO_accO_accO_accO_acc
135
gelu table
136
addrvalueo_acc
[0,0]
32bit
o_acc
[0,1]
32bit
...o_acc
[31,31]
32bit
O_accO_accO_accO_accO_accO_accO_accO_acc
137
10x00
138
gelu_table[x]20x01O_accO_accO_accO_accO_accO_accO_accO_acc
139
=30x01⬇️ flatten
140
outputint3240x02ram⬅️O_accO_accO_accO_accO_accO_accO_accO_acc
141
3276850x02o_acc [0,0]o_acc [0,1]...
142
60x0332bit length32bit length...···중 간 생 략···
143
......
144
327680xFF⬇️ cpu(ram)으로 보내기O_accO_accO_accO_accO_accO_accO_accO_acc
145
146
O_accO_accO_accO_accO_accO_accO_accO_acc
147
148
softmaxexpunitO_accO_accO_accO_accO_accO_accO_accO_acc
149
150
O_accO_accO_accO_accO_accO_accO_acc1024